For the reader: You can click the black “code” box on the right hand side to either see or hide the R code behind analysis results and graphs.

The results are arranged to separate tabs by site/area, so that it’s easy to look at just the results of the site of interest, instead of scrolling accidentally down to a different site and getting confused.

The results for each site consist of four parts: pXRF, particle size distribution, loss on ignition, and colorimetry.

Ashdod-Yam

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);

library(GGally); 
library(corrplot); 
library(tidyr); 
library(cluster); 
library(ggplot2); 
library(ggrepel);
library(ggfortify); 
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AY.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Turn any "LOD":s and missing values to "NA":s to allow scaling
values <- c("MgO", "Al2O3", "SiO2", 
            "S", "Cl", "K2O", "CaO", "Ti", "V", "Cr", 
            "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "As", 
            "Se", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", 
            "Rh", "Pd", "Ag", "Cd", "Sn", "Sb", 
            "La", "Ce", "Hf", "Ta", "W", "Pt", "Au", 
            "Hg", "Tl", "Pb", "Bi", "Th", "U")

pxrf_NA <- select(pxrf, one_of(values))
pxrf_NA <- pxrf_NA %>% mutate_if(is.character,as.numeric)

# Scaling the data with standard z-score method
scaled_pxrf <- as.data.frame(scale(pxrf_NA))

# Average data by "Sample", "Area" and "Type" columns from the original dataset
average_pxrf <- aggregate(scaled_pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Assign sample names as row names, and rename the other newly created columns back to "Type" and "Area" for clarity
rownames(average_pxrf) <- average_pxrf$Group.1
average_pxrf <- average_pxrf %>% select(-Group.1)
names(average_pxrf)[names(average_pxrf) == "Group.2"] <- "Area"
names(average_pxrf)[names(average_pxrf) == "Group.3"] <- "Type"

# Change "Type" and "Area" to factors
average_pxrf$Type <- factor(average_pxrf$Type)
average_pxrf$Area <- factor(average_pxrf$Area)

# Removing all columns that only have NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- average_pxrf %>% select_if(all_na)

# Removing all columns that contain any NA values at all
any_na <- function(x) all(!is.na(x))
pxrf_no_na <- average_pxrf %>% select_if(any_na)

pxrf_no_na:

summary(pxrf_no_na)
##                Area             Type        Al2O3               SiO2           
##  Area D Acropolis:19   mud mortar : 1   Min.   :-1.20092   Min.   :-1.3142038  
##  Area D Wall     :22   mud plaster: 5   1st Qu.:-0.49712   1st Qu.:-0.4978527  
##  Builder's floor : 5   mudbrick   :35   Median :-0.23740   Median :-0.0003512  
##  Hellenistic     : 1   PEM        : 6   Mean   : 0.04576   Mean   : 0.0389010  
##                                         3rd Qu.: 0.54615   3rd Qu.: 0.5568590  
##                                         Max.   : 1.88920   Max.   : 1.3770734  
##       K2O                CaO                 Ti                 Mn          
##  Min.   :-1.08731   Min.   :-1.56460   Min.   :-1.17385   Min.   :-1.00032  
##  1st Qu.:-0.54550   1st Qu.:-0.45677   1st Qu.:-0.47636   1st Qu.:-0.50265  
##  Median :-0.02978   Median : 0.07788   Median :-0.04881   Median :-0.13876  
##  Mean   : 0.04621   Mean   : 0.06819   Mean   : 0.04264   Mean   : 0.03671  
##  3rd Qu.: 0.39368   3rd Qu.: 0.64823   3rd Qu.: 0.53925   3rd Qu.: 0.44988  
##  Max.   : 2.35094   Max.   : 1.91455   Max.   : 2.05476   Max.   : 1.93487  
##        Fe                 Cu                 Zn                 Rb          
##  Min.   :-1.10643   Min.   :-1.50957   Min.   :-1.03131   Min.   :-1.28223  
##  1st Qu.:-0.49989   1st Qu.:-0.49095   1st Qu.:-0.61933   1st Qu.:-0.54198  
##  Median :-0.09144   Median : 0.18813   Median :-0.20735   Median :-0.13073  
##  Mean   : 0.04543   Mean   : 0.04846   Mean   : 0.02864   Mean   : 0.04121  
##  3rd Qu.: 0.46449   3rd Qu.: 0.64086   3rd Qu.: 0.29970   3rd Qu.: 0.68149  
##  Max.   : 2.31412   Max.   : 1.88584   Max.   : 2.45465   Max.   : 1.92553  
##        Sr                 Zr          
##  Min.   :-1.85766   Min.   :-1.61460  
##  1st Qu.:-0.41969   1st Qu.:-0.60738  
##  Median : 0.10393   Median : 0.05199  
##  Mean   : 0.07807   Mean   : 0.05398  
##  3rd Qu.: 0.51031   3rd Qu.: 0.55041  
##  Max.   : 2.08896   Max.   : 2.11835

pxrf_all:

summary(pxrf_all)
##                Area             Type         MgO               Al2O3         
##  Area D Acropolis:19   mud mortar : 1   Min.   :-1.23821   Min.   :-1.20092  
##  Area D Wall     :22   mud plaster: 5   1st Qu.:-0.43843   1st Qu.:-0.49712  
##  Builder's floor : 5   mudbrick   :35   Median : 0.02957   Median :-0.23740  
##  Hellenistic     : 1   PEM        : 6   Mean   : 0.03914   Mean   : 0.04576  
##                                         3rd Qu.: 0.42944   3rd Qu.: 0.54615  
##                                         Max.   : 1.75657   Max.   : 1.88920  
##                                         NA's   :1                            
##       SiO2                  S                 Cl                K2O          
##  Min.   :-1.3142038   Min.   :-0.2340   Min.   :-0.88004   Min.   :-1.08731  
##  1st Qu.:-0.4978527   1st Qu.:-0.2328   1st Qu.:-0.51354   1st Qu.:-0.54550  
##  Median :-0.0003512   Median :-0.2107   Median :-0.10742   Median :-0.02978  
##  Mean   : 0.0389010   Mean   :-0.2000   Mean   : 0.01773   Mean   : 0.04621  
##  3rd Qu.: 0.5568590   3rd Qu.:-0.1779   3rd Qu.: 0.17488   3rd Qu.: 0.39368  
##  Max.   : 1.3770734   Max.   :-0.1447   Max.   : 3.94139   Max.   : 2.35094  
##                       NA's   :43        NA's   :1                            
##       CaO                 Ti                 V                  Mn          
##  Min.   :-1.56460   Min.   :-1.17385   Min.   :-0.98017   Min.   :-1.00032  
##  1st Qu.:-0.45677   1st Qu.:-0.47636   1st Qu.:-0.26027   1st Qu.:-0.50265  
##  Median : 0.07788   Median :-0.04881   Median : 0.08755   Median :-0.13876  
##  Mean   : 0.06819   Mean   : 0.04264   Mean   : 0.08074   Mean   : 0.03671  
##  3rd Qu.: 0.64823   3rd Qu.: 0.53925   3rd Qu.: 0.42728   3rd Qu.: 0.44988  
##  Max.   : 1.91455   Max.   : 2.05476   Max.   : 1.57588   Max.   : 1.93487  
##                                        NA's   :28                           
##        Fe                 Ni                 Cu                 Zn          
##  Min.   :-1.10643   Min.   :-1.10296   Min.   :-1.50957   Min.   :-1.03131  
##  1st Qu.:-0.49989   1st Qu.:-0.44318   1st Qu.:-0.49095   1st Qu.:-0.61933  
##  Median :-0.09144   Median :-0.05642   Median : 0.18813   Median :-0.20735  
##  Mean   : 0.04543   Mean   : 0.01430   Mean   : 0.04846   Mean   : 0.02864  
##  3rd Qu.: 0.46449   3rd Qu.: 0.53510   3rd Qu.: 0.64086   3rd Qu.: 0.29970  
##  Max.   : 2.31412   Max.   : 1.71814   Max.   : 1.88584   Max.   : 2.45465  
##                     NA's   :1                                               
##        As                Rb                 Sr                 Y           
##  Min.   :-0.8953   Min.   :-1.28223   Min.   :-1.85766   Min.   :-1.16797  
##  1st Qu.:-0.2961   1st Qu.:-0.54198   1st Qu.:-0.41969   1st Qu.:-0.41036  
##  Median : 0.2033   Median :-0.13073   Median : 0.10393   Median :-0.05384  
##  Mean   : 0.2832   Mean   : 0.04121   Mean   : 0.07807   Mean   : 0.04866  
##  3rd Qu.: 0.6028   3rd Qu.: 0.68149   3rd Qu.: 0.51031   3rd Qu.: 0.39182  
##  Max.   : 2.3007   Max.   : 1.92553   Max.   : 2.08896   Max.   : 2.26357  
##  NA's   :27                                              NA's   :2         
##        Zr                 Nb          
##  Min.   :-1.61460   Min.   :-0.89312  
##  1st Qu.:-0.60738   1st Qu.:-0.48298  
##  Median : 0.05199   Median :-0.07283  
##  Mean   : 0.05398   Mean   : 0.06816  
##  3rd Qu.: 0.55041   3rd Qu.: 0.43985  
##  Max.   : 2.11835   Max.   : 2.18298  
##                     NA's   :31

pXRF: correlations

# Drop the more dubious elements
pxrf_1 <- pxrf_no_na %>% select(-Al2O3)
pxrf_1 <- pxrf_1 %>% select(-CaO)

# Visualize correlations between elements
cor(pxrf_1[3:12]) %>% corrplot (type = "lower", tl.cex = 1, tl.pos="d", cl.pos="r")

# Optimal number of clusters in K-means analysis: 2
k_max <- 8
twcss <- sapply(1:k_max, function(k){kmeans(pxrf_1[3:12], k)$tot.withinss})
qplot(x = 1:k_max, y = twcss, geom = 'line')

# K-means clustering in ggpairs: 2 clusters
km <-kmeans(pxrf_1[3:12], centers = 2)
cluster_x <- as.factor(km$cluster)
ggpairs(pxrf_1, mapping = aes(col = cluster_x))

pXRF: PCA

PCA with only the no-NA:s elements:

# Elements with no NA values at all
colnames(pxrf_no_na)
##  [1] "Area"  "Type"  "Al2O3" "SiO2"  "K2O"   "CaO"   "Ti"    "Mn"    "Fe"   
## [10] "Cu"    "Zn"    "Rb"    "Sr"    "Zr"
# PCA
pca_1 <- prcomp(pxrf_1[3:12])
summary(pca_1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     1.9037 1.0026 0.8270 0.67213 0.37446 0.35729 0.29419
## Proportion of Variance 0.5809 0.1611 0.1096 0.07242 0.02248 0.02046 0.01387
## Cumulative Proportion  0.5809 0.7421 0.8517 0.92413 0.94661 0.96707 0.98095
##                            PC8     PC9    PC10
## Standard deviation     0.26346 0.16502 0.14906
## Proportion of Variance 0.01113 0.00437 0.00356
## Cumulative Proportion  0.99207 0.99644 1.00000
# PCA1 plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Ashdod-Yam elements")

autoplot(pca_1, data=pxrf_1, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam grouped by area")

autoplot(pca_1, data=pxrf_1, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam grouped by sample type")

PCA with all the feasible elements:

# Elements that have at least one viable value
colnames(pxrf_all)
##  [1] "Area"  "Type"  "MgO"   "Al2O3" "SiO2"  "S"     "Cl"    "K2O"   "CaO"  
## [10] "Ti"    "V"     "Mn"    "Fe"    "Ni"    "Cu"    "Zn"    "As"    "Rb"   
## [19] "Sr"    "Y"     "Zr"    "Nb"
# PCA with NA:s converted to zeros
pxrf_all[is.na(pxrf_all)] <- 0

pca_2 <- prcomp(pxrf_all[3:22])
summary(pca_2)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.1392 1.2623 1.1731 0.77694 0.75938 0.56778 0.50254
## Proportion of Variance 0.4455 0.1551 0.1340 0.05877 0.05614 0.03138 0.02459
## Cumulative Proportion  0.4455 0.6006 0.7346 0.79337 0.84951 0.88090 0.90548
##                            PC8     PC9   PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.44075 0.42941 0.3738 0.33174 0.29859 0.26048 0.24174
## Proportion of Variance 0.01891 0.01795 0.0136 0.01071 0.00868 0.00661 0.00569
## Cumulative Proportion  0.92439 0.94235 0.9559 0.96666 0.97534 0.98195 0.98764
##                           PC15    PC16    PC17    PC18    PC19    PC20
## Standard deviation     0.22407 0.16224 0.16055 0.12292 0.08709 0.04448
## Proportion of Variance 0.00489 0.00256 0.00251 0.00147 0.00074 0.00019
## Cumulative Proportion  0.99253 0.99509 0.99760 0.99907 0.99981 1.00000
# PCA2 plots
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Ashdod-Yam more elements")

autoplot(pca_1, data=pxrf_1, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam more elements grouped by area")

autoplot(pca_1, data=pxrf_1, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam more elements grouped by sample type")

pXRF: HCA

# HCA

# compute divisive hierarchical clustering
hc4 <- diana(pxrf_1[3:12])

# Divise coefficient; amount of clustering structure found
hc4$dc
## [1] 0.8007861
# Diana dendrogram
pltree(hc4, cex = 1, hang = -1, main = "Dendrogram of diana")

# HCA dendrogam 1
dend <- 
    pxrf_1[3:12] %>%                    # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    set("branches_k_color", k=5) %>% 
    set("branches_lwd", 1.2) %>%
    set("labels_col", c("red", "black", "blue"), k=5) %>%    
    set("labels_cex", 1) %>% 
    set("leaves_pch", NA) 
# plot the dend in usual "base" plotting engine:
plot(dend)

# HCA dendrogam 2: 
HCA.ward5 <- hclust(dist(pxrf_1[3:12], method="euclid"), method="ward.D2")
plot(HCA.ward5, main="Ward's method")
rect.hclust(HCA.ward5, k=5)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_AY.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=2)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

# Libraries
library(dplyr); 
library(GGally); 
library(corrplot); 
library(tidyr); 
library(openxlsx); 
library(cluster); 
library(ggplot2); 
library(ggrepel);
library(ggfortify); 
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters
library(ggtern) # ternary plots

Preparing data

loi <- read.xlsx("data/loi_AY.xlsx", sep=";")
tga <- read.xlsx("data/tga_AY.xlsx", sep=";")

summary(loi)
##     sample             crucible       wet_weight      sample_wet   
##  Length:52          Min.   :8.079   Min.   :10.13   Min.   :1.538  
##  Class :character   1st Qu.:8.566   1st Qu.:11.00   1st Qu.:2.464  
##  Mode  :character   Median :9.006   Median :11.71   Median :2.635  
##                     Mean   :8.994   Mean   :11.55   Mean   :2.559  
##                     3rd Qu.:9.435   3rd Qu.:12.05   3rd Qu.:2.824  
##                     Max.   :9.925   Max.   :12.68   Max.   :3.043  
##    dry_weight     after_550_C     after_950_C      context         
##  Min.   :10.11   Min.   :10.08   Min.   :10.05   Length:52         
##  1st Qu.:10.97   1st Qu.:10.94   1st Qu.:10.86   Class :character  
##  Median :11.67   Median :11.62   Median :11.53   Mode  :character  
##  Mean   :11.52   Mean   :11.47   Mean   :11.41                     
##  3rd Qu.:12.00   3rd Qu.:11.96   3rd Qu.:11.93                     
##  Max.   :12.65   Max.   :12.60   Max.   :12.52                     
##      type          
##  Length:52         
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
summary(tga)
##      Name           Analysis;Date         Batch               Wet           
##  Length:53          Length:53          Length:53          Length:53         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      Dry              Mass_550           Mass_950        
##  Length:53          Length:53          Length:53         
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(loi$c550, loi$c950)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(loi$c950)
## $stats
## [1] 0.398150 1.528018 2.198513 3.162314 4.889189
## 
## $n
## [1] 52
## 
## $conf
## [1] 1.840428 2.556598
## 
## $out
## [1]  8.041755 11.960444
# The two high carbonate outliers are both AY-53 (two different measurements to make sure the first batch wasn't misleading)

# Removing duplicates from the next graph
rownames(loi)
##  [1] "AY-1"    "AY-2"    "AY-3"    "AY-4"    "AY-5"    "AY-6"    "AY-7"   
##  [8] "AY-8"    "AY-9"    "AY-10"   "AY-11"   "AY-12"   "AY-13"   "AY-14"  
## [15] "AY-15"   "AY-16"   "AY-17"   "AY-18"   "AY-19"   "AY-20"   "AY-21"  
## [22] "AY-22"   "AY-23"   "AY-24"   "AY-25"   "AY-26"   "AY-27"   "AY-28"  
## [29] "AY-29"   "AY-30"   "AY-31"   "AY-32"   "AY-33"   "AY-34"   "AY-35"  
## [36] "AY-36"   "AY-37"   "AY-38"   "AY-39"   "AY-40"   "AY-41"   "AY-42"  
## [43] "AY-50"   "AY-51"   "AY-52"   "AY-53"   "AY-54"   "AY-50_2" "AY-51_2"
## [50] "AY-52_2" "AY-53_2" "AY-54_2"
loi <- subset(loi[1:47, ])

ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

ggplot(loi, 
      aes(c550, c950)) +
      geom_point(size=2, aes(shape = factor(type), colour = factor(context))) +
      labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

barplot(loi$c550)

barplot(loi$c950)

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(tga$c550, tga$c950)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(tga$c950)
## $stats
## [1] 0.7233976 1.5855926 2.3958197 3.4621816 5.4590326
## 
## $n
## [1] 50
## 
## $conf
## [1] 1.976504 2.815135
## 
## $out
## [1] 7.994718
# Removing duplicates from the next graph
#rownames(tga)
#loi <- subset(loi[1:47, ])

ggplot(tga, 
      aes(c550, c950, label = rownames(tga))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

barplot(tga$c550)

barplot(tga$c950)

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AY.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :37.56   Min.   : 7.97   Min.   :16.81  
##  1st Qu.:42.13   1st Qu.: 9.72   1st Qu.:19.76  
##  Median :43.64   Median :10.38   Median :21.18  
##  Mean   :43.92   Mean   :10.49   Mean   :21.22  
##  3rd Qu.:45.91   3rd Qu.:11.29   3rd Qu.:22.34  
##  Max.   :48.24   Max.   :13.53   Max.   :25.03
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Ashdod-Yam: Byzantine

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

pxrf_no_na:

summary(pxrf_no_na)
##                Area             Type        Al2O3               SiO2           
##  Area D Acropolis:19   mud mortar : 1   Min.   :-1.20092   Min.   :-1.3142038  
##  Area D Wall     :22   mud plaster: 5   1st Qu.:-0.49712   1st Qu.:-0.4978527  
##  Builder's floor : 5   mudbrick   :35   Median :-0.23740   Median :-0.0003512  
##  Hellenistic     : 1   PEM        : 6   Mean   : 0.04576   Mean   : 0.0389010  
##                                         3rd Qu.: 0.54615   3rd Qu.: 0.5568590  
##                                         Max.   : 1.88920   Max.   : 1.3770734  
##       K2O                CaO                 Ti                 Mn          
##  Min.   :-1.08731   Min.   :-1.56460   Min.   :-1.17385   Min.   :-1.00032  
##  1st Qu.:-0.54550   1st Qu.:-0.45677   1st Qu.:-0.47636   1st Qu.:-0.50265  
##  Median :-0.02978   Median : 0.07788   Median :-0.04881   Median :-0.13876  
##  Mean   : 0.04621   Mean   : 0.06819   Mean   : 0.04264   Mean   : 0.03671  
##  3rd Qu.: 0.39368   3rd Qu.: 0.64823   3rd Qu.: 0.53925   3rd Qu.: 0.44988  
##  Max.   : 2.35094   Max.   : 1.91455   Max.   : 2.05476   Max.   : 1.93487  
##        Fe                 Cu                 Zn                 Rb          
##  Min.   :-1.10643   Min.   :-1.50957   Min.   :-1.03131   Min.   :-1.28223  
##  1st Qu.:-0.49989   1st Qu.:-0.49095   1st Qu.:-0.61933   1st Qu.:-0.54198  
##  Median :-0.09144   Median : 0.18813   Median :-0.20735   Median :-0.13073  
##  Mean   : 0.04543   Mean   : 0.04846   Mean   : 0.02864   Mean   : 0.04121  
##  3rd Qu.: 0.46449   3rd Qu.: 0.64086   3rd Qu.: 0.29970   3rd Qu.: 0.68149  
##  Max.   : 2.31412   Max.   : 1.88584   Max.   : 2.45465   Max.   : 1.92553  
##        Sr                 Zr          
##  Min.   :-1.85766   Min.   :-1.61460  
##  1st Qu.:-0.41969   1st Qu.:-0.60738  
##  Median : 0.10393   Median : 0.05199  
##  Mean   : 0.07807   Mean   : 0.05398  
##  3rd Qu.: 0.51031   3rd Qu.: 0.55041  
##  Max.   : 2.08896   Max.   : 2.11835

pxrf_all:

summary(pxrf_all)
##                Area             Type         MgO                Al2O3         
##  Area D Acropolis:19   mud mortar : 1   Min.   :-1.238209   Min.   :-1.20092  
##  Area D Wall     :22   mud plaster: 5   1st Qu.:-0.422347   1st Qu.:-0.49712  
##  Builder's floor : 5   mudbrick   :35   Median : 0.004755   Median :-0.23740  
##  Hellenistic     : 1   PEM        : 6   Mean   : 0.038303   Mean   : 0.04576  
##                                         3rd Qu.: 0.429173   3rd Qu.: 0.54615  
##                                         Max.   : 1.756566   Max.   : 1.88920  
##       SiO2                  S                  Cl                K2O          
##  Min.   :-1.3142038   Min.   :-0.23396   Min.   :-0.88004   Min.   :-1.08731  
##  1st Qu.:-0.4978527   1st Qu.: 0.00000   1st Qu.:-0.50859   1st Qu.:-0.54550  
##  Median :-0.0003512   Median : 0.00000   Median :-0.10495   Median :-0.02978  
##  Mean   : 0.0389010   Mean   :-0.01702   Mean   : 0.01735   Mean   : 0.04621  
##  3rd Qu.: 0.5568590   3rd Qu.: 0.00000   3rd Qu.: 0.16497   3rd Qu.: 0.39368  
##  Max.   : 1.3770734   Max.   : 0.00000   Max.   : 3.94139   Max.   : 2.35094  
##       CaO                 Ti                 V                  Mn          
##  Min.   :-1.56460   Min.   :-1.17385   Min.   :-0.98017   Min.   :-1.00032  
##  1st Qu.:-0.45677   1st Qu.:-0.47636   1st Qu.: 0.00000   1st Qu.:-0.50265  
##  Median : 0.07788   Median :-0.04881   Median : 0.00000   Median :-0.13876  
##  Mean   : 0.06819   Mean   : 0.04264   Mean   : 0.03264   Mean   : 0.03671  
##  3rd Qu.: 0.64823   3rd Qu.: 0.53925   3rd Qu.: 0.00000   3rd Qu.: 0.44988  
##  Max.   : 1.91455   Max.   : 2.05476   Max.   : 1.57588   Max.   : 1.93487  
##        Fe                 Ni                 Cu                 Zn          
##  Min.   :-1.10643   Min.   :-1.10296   Min.   :-1.50957   Min.   :-1.03131  
##  1st Qu.:-0.49989   1st Qu.:-0.42043   1st Qu.:-0.49095   1st Qu.:-0.61933  
##  Median :-0.09144   Median :-0.01092   Median : 0.18813   Median :-0.20735  
##  Mean   : 0.04543   Mean   : 0.01400   Mean   : 0.04846   Mean   : 0.02864  
##  3rd Qu.: 0.46449   3rd Qu.: 0.53510   3rd Qu.: 0.64086   3rd Qu.: 0.29970  
##  Max.   : 2.31412   Max.   : 1.71814   Max.   : 1.88584   Max.   : 2.45465  
##        As                Rb                 Sr                 Y           
##  Min.   :-0.8953   Min.   :-1.28223   Min.   :-1.85766   Min.   :-1.16797  
##  1st Qu.: 0.0000   1st Qu.:-0.54198   1st Qu.:-0.41969   1st Qu.:-0.41036  
##  Median : 0.0000   Median :-0.13073   Median : 0.10393   Median : 0.00000  
##  Mean   : 0.1205   Mean   : 0.04121   Mean   : 0.07807   Mean   : 0.04659  
##  3rd Qu.: 0.0000   3rd Qu.: 0.68149   3rd Qu.: 0.51031   3rd Qu.: 0.39182  
##  Max.   : 2.3007   Max.   : 1.92553   Max.   : 2.08896   Max.   : 2.26357  
##        Zr                 Nb         
##  Min.   :-1.61460   Min.   :-0.8931  
##  1st Qu.:-0.60738   1st Qu.: 0.0000  
##  Median : 0.05199   Median : 0.0000  
##  Mean   : 0.05398   Mean   : 0.0232  
##  3rd Qu.: 0.55041   3rd Qu.: 0.0000  
##  Max.   : 2.11835   Max.   : 2.1830

pXRF: Elemental correlations and K means cluster analysis

pXRF: PCA

PCA with only the no-NA:s elements:

PCA with all the feasible elements:

pXRF: HCA

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_AYB.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

# Libraries
library(dplyr); 
library(GGally); 
library(corrplot); 
library(tidyr); 
library(openxlsx); 
library(cluster); 
library(ggplot2); 
library(ggrepel);
library(ggfortify); 
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters
library(ggtern) # ternary plots

Preparing data

loi <- read.xlsx("data/loi_AYB.xlsx", sep=";")
tga <- read.xlsx("data/tga_AYB.xlsx", sep=";")

summary(loi)
##     sample             crucible       wet_weight      sample_wet   
##  Length:7           Min.   :8.382   Min.   :10.92   Min.   :2.257  
##  Class :character   1st Qu.:8.555   1st Qu.:11.39   1st Qu.:2.613  
##  Mode  :character   Median :8.666   Median :11.50   Median :2.880  
##                     Mean   :8.943   Mean   :11.68   Mean   :2.739  
##                     3rd Qu.:9.442   3rd Qu.:12.13   3rd Qu.:2.905  
##                     Max.   :9.560   Max.   :12.31   Max.   :3.002  
##    dry_weight     after_550_C     after_950_C   
##  Min.   :10.83   Min.   :10.62   Min.   :10.50  
##  1st Qu.:11.33   1st Qu.:11.25   1st Qu.:11.04  
##  Median :11.45   Median :11.39   Median :11.21  
##  Mean   :11.63   Mean   :11.55   Mean   :11.24  
##  3rd Qu.:12.11   3rd Qu.:12.05   3rd Qu.:11.43  
##  Max.   :12.26   Max.   :12.24   Max.   :12.06
summary(tga)
##      Name           Analysis;Date      Batch               Wet           
##  Length:7           Min.   :44536   Length:7           Length:7          
##  Class :character   1st Qu.:44536   Class :character   Class :character  
##  Mode  :character   Median :44536   Mode  :character   Mode  :character  
##                     Mean   :44536                                        
##                     3rd Qu.:44536                                        
##                     Max.   :44536                                        
##      Dry              Mass_550           Mass_950        
##  Length:7           Length:7           Length:7          
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
## 
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(loi$c550, loi$c950)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(loi$c950)
## $stats
## [1]  1.087679  5.239954  6.463715 14.575647 15.544905
## 
## $n
## [1] 7
## 
## $conf
## [1]  0.8885902 12.0388404
## 
## $out
## [1] 32.40429
# The two high carbonate outliers are both AY-53 (two different measurements to make sure the first batch wasn't misleading)

# Removing duplicates from the next graph
rownames(loi)
## [1] "AY-43" "AY-44" "AY-45" "AY-46" "AY-47" "AY-48" "AY-49"
loi <- subset(loi[1:47, ])

### ggplots later

barplot(loi$c550)

barplot(loi$c950)

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(tga$c550, tga$c950)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(tga$c950)
## $stats
## [1]  6.813924  8.278065  9.781651 11.887838 12.043832
## 
## $n
## [1] 7
## 
## $conf
## [1]  7.625953 11.937349
## 
## $out
## [1] 29.72412
# Removing duplicates from the next graph
#rownames(tga)
#loi <- subset(loi[1:47, ])

ggplot(tga, 
      aes(c550, c950, label = rownames(tga))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

barplot(tga$c550)

barplot(tga$c950)

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AYB.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a                b        
##  Min.   :34.56   Min.   : 2.480   Min.   : 6.98  
##  1st Qu.:38.93   1st Qu.: 5.215   1st Qu.:12.73  
##  Median :45.30   Median : 5.540   Median :13.28  
##  Mean   :46.69   Mean   : 6.211   Mean   :14.56  
##  3rd Qu.:50.95   3rd Qu.: 6.675   3rd Qu.:16.21  
##  Max.   :67.20   Max.   :11.680   Max.   :23.75
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Israel

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

pxrf_no_na:

summary(pxrf_no_na)
##                Area             Type        Al2O3               SiO2           
##  Area D Acropolis:19   mud mortar : 1   Min.   :-1.20092   Min.   :-1.3142038  
##  Area D Wall     :22   mud plaster: 5   1st Qu.:-0.49712   1st Qu.:-0.4978527  
##  Builder's floor : 5   mudbrick   :35   Median :-0.23740   Median :-0.0003512  
##  Hellenistic     : 1   PEM        : 6   Mean   : 0.04576   Mean   : 0.0389010  
##                                         3rd Qu.: 0.54615   3rd Qu.: 0.5568590  
##                                         Max.   : 1.88920   Max.   : 1.3770734  
##       K2O                CaO                 Ti                 Mn          
##  Min.   :-1.08731   Min.   :-1.56460   Min.   :-1.17385   Min.   :-1.00032  
##  1st Qu.:-0.54550   1st Qu.:-0.45677   1st Qu.:-0.47636   1st Qu.:-0.50265  
##  Median :-0.02978   Median : 0.07788   Median :-0.04881   Median :-0.13876  
##  Mean   : 0.04621   Mean   : 0.06819   Mean   : 0.04264   Mean   : 0.03671  
##  3rd Qu.: 0.39368   3rd Qu.: 0.64823   3rd Qu.: 0.53925   3rd Qu.: 0.44988  
##  Max.   : 2.35094   Max.   : 1.91455   Max.   : 2.05476   Max.   : 1.93487  
##        Fe                 Cu                 Zn                 Rb          
##  Min.   :-1.10643   Min.   :-1.50957   Min.   :-1.03131   Min.   :-1.28223  
##  1st Qu.:-0.49989   1st Qu.:-0.49095   1st Qu.:-0.61933   1st Qu.:-0.54198  
##  Median :-0.09144   Median : 0.18813   Median :-0.20735   Median :-0.13073  
##  Mean   : 0.04543   Mean   : 0.04846   Mean   : 0.02864   Mean   : 0.04121  
##  3rd Qu.: 0.46449   3rd Qu.: 0.64086   3rd Qu.: 0.29970   3rd Qu.: 0.68149  
##  Max.   : 2.31412   Max.   : 1.88584   Max.   : 2.45465   Max.   : 1.92553  
##        Sr                 Zr          
##  Min.   :-1.85766   Min.   :-1.61460  
##  1st Qu.:-0.41969   1st Qu.:-0.60738  
##  Median : 0.10393   Median : 0.05199  
##  Mean   : 0.07807   Mean   : 0.05398  
##  3rd Qu.: 0.51031   3rd Qu.: 0.55041  
##  Max.   : 2.08896   Max.   : 2.11835

pxrf_all:

summary(pxrf_all)
##                Area             Type         MgO                Al2O3         
##  Area D Acropolis:19   mud mortar : 1   Min.   :-1.238209   Min.   :-1.20092  
##  Area D Wall     :22   mud plaster: 5   1st Qu.:-0.422347   1st Qu.:-0.49712  
##  Builder's floor : 5   mudbrick   :35   Median : 0.004755   Median :-0.23740  
##  Hellenistic     : 1   PEM        : 6   Mean   : 0.038303   Mean   : 0.04576  
##                                         3rd Qu.: 0.429173   3rd Qu.: 0.54615  
##                                         Max.   : 1.756566   Max.   : 1.88920  
##       SiO2                  S                  Cl                K2O          
##  Min.   :-1.3142038   Min.   :-0.23396   Min.   :-0.88004   Min.   :-1.08731  
##  1st Qu.:-0.4978527   1st Qu.: 0.00000   1st Qu.:-0.50859   1st Qu.:-0.54550  
##  Median :-0.0003512   Median : 0.00000   Median :-0.10495   Median :-0.02978  
##  Mean   : 0.0389010   Mean   :-0.01702   Mean   : 0.01735   Mean   : 0.04621  
##  3rd Qu.: 0.5568590   3rd Qu.: 0.00000   3rd Qu.: 0.16497   3rd Qu.: 0.39368  
##  Max.   : 1.3770734   Max.   : 0.00000   Max.   : 3.94139   Max.   : 2.35094  
##       CaO                 Ti                 V                  Mn          
##  Min.   :-1.56460   Min.   :-1.17385   Min.   :-0.98017   Min.   :-1.00032  
##  1st Qu.:-0.45677   1st Qu.:-0.47636   1st Qu.: 0.00000   1st Qu.:-0.50265  
##  Median : 0.07788   Median :-0.04881   Median : 0.00000   Median :-0.13876  
##  Mean   : 0.06819   Mean   : 0.04264   Mean   : 0.03264   Mean   : 0.03671  
##  3rd Qu.: 0.64823   3rd Qu.: 0.53925   3rd Qu.: 0.00000   3rd Qu.: 0.44988  
##  Max.   : 1.91455   Max.   : 2.05476   Max.   : 1.57588   Max.   : 1.93487  
##        Fe                 Ni                 Cu                 Zn          
##  Min.   :-1.10643   Min.   :-1.10296   Min.   :-1.50957   Min.   :-1.03131  
##  1st Qu.:-0.49989   1st Qu.:-0.42043   1st Qu.:-0.49095   1st Qu.:-0.61933  
##  Median :-0.09144   Median :-0.01092   Median : 0.18813   Median :-0.20735  
##  Mean   : 0.04543   Mean   : 0.01400   Mean   : 0.04846   Mean   : 0.02864  
##  3rd Qu.: 0.46449   3rd Qu.: 0.53510   3rd Qu.: 0.64086   3rd Qu.: 0.29970  
##  Max.   : 2.31412   Max.   : 1.71814   Max.   : 1.88584   Max.   : 2.45465  
##        As                Rb                 Sr                 Y           
##  Min.   :-0.8953   Min.   :-1.28223   Min.   :-1.85766   Min.   :-1.16797  
##  1st Qu.: 0.0000   1st Qu.:-0.54198   1st Qu.:-0.41969   1st Qu.:-0.41036  
##  Median : 0.0000   Median :-0.13073   Median : 0.10393   Median : 0.00000  
##  Mean   : 0.1205   Mean   : 0.04121   Mean   : 0.07807   Mean   : 0.04659  
##  3rd Qu.: 0.0000   3rd Qu.: 0.68149   3rd Qu.: 0.51031   3rd Qu.: 0.39182  
##  Max.   : 2.3007   Max.   : 1.92553   Max.   : 2.08896   Max.   : 2.26357  
##        Zr                 Nb         
##  Min.   :-1.61460   Min.   :-0.8931  
##  1st Qu.:-0.60738   1st Qu.: 0.0000  
##  Median : 0.05199   Median : 0.0000  
##  Mean   : 0.05398   Mean   : 0.0232  
##  3rd Qu.: 0.55041   3rd Qu.: 0.0000  
##  Max.   : 2.11835   Max.   : 2.1830

pXRF: Elemental correlations and K means cluster analysis

pXRF: PCA

PCA with only the no-NA:s elements:

PCA with all the feasible elements:

pXRF: HCA

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_AY.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

# Libraries
library(dplyr); 
library(GGally); 
library(corrplot); 
library(tidyr); 
library(openxlsx); 
library(cluster); 
library(ggplot2); 
library(ggrepel);
library(ggfortify); 
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters
library(ggtern) # ternary plots

Preparing data

loi <- read.xlsx("data/loi_israel.xlsx", sep=";")
tga <- read.xlsx("data/tga_israel.xlsx", sep=";")

summary(loi)
##     sample             crucible       wet_weight      sample_wet   
##  Length:78          Min.   :7.921   Min.   :10.13   Min.   :1.538  
##  Class :character   1st Qu.:8.566   1st Qu.:11.18   1st Qu.:2.448  
##  Mode  :character   Median :9.006   Median :11.71   Median :2.635  
##                     Mean   :8.995   Mean   :11.58   Mean   :2.588  
##                     3rd Qu.:9.449   3rd Qu.:12.05   3rd Qu.:2.845  
##                     Max.   :9.925   Max.   :12.68   Max.   :3.231  
##    dry_weight     after_550_C     after_950_C    
##  Min.   :10.11   Min.   :10.08   Min.   : 9.773  
##  1st Qu.:11.17   1st Qu.:11.12   1st Qu.:10.820  
##  Median :11.67   Median :11.62   Median :11.302  
##  Mean   :11.55   Mean   :11.49   Mean   :11.267  
##  3rd Qu.:12.00   3rd Qu.:11.95   3rd Qu.:11.781  
##  Max.   :12.65   Max.   :12.60   Max.   :12.524
summary(tga)
##      Name           Analysis;Date         Batch               Wet           
##  Length:72          Length:72          Length:72          Length:72         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      Dry              Mass_550           Mass_950        
##  Length:72          Length:72          Length:72         
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(loi$c550, loi$c950)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(loi$c950)
## $stats
## [1]  0.398150  1.912342  3.049426  9.981413 19.321298
## 
## $n
## [1] 78
## 
## $conf
## [1] 1.605872 4.492981
## 
## $out
##  [1] 29.25001 40.50466 33.45907 42.03850 42.60863 29.14914 29.25505 27.79538
##  [9] 33.34575 40.99217 26.13474 41.29933 32.40429
# The two high carbonate outliers are both AY-53 (two different measurements to make sure the first batch wasn't misleading)

# Removing duplicates from the next graph
rownames(loi)
##  [1] "AH-1"    "AH-2"    "AH-3"    "AH-4"    "AH-5"    "AH-6"    "AH-7"   
##  [8] "AH-8"    "AH-9"    "TI-1"    "TI-2"    "TI-3"    "TI-4"    "TI-5"   
## [15] "TI-6"    "TI-7"    "TI-8"    "TI-10"   "RLZ-1"   "AY-43"   "AY-44"  
## [22] "AY-45"   "AY-46"   "AY-47"   "AY-48"   "AY-49"   "AY-1"    "AY-2"   
## [29] "AY-3"    "AY-4"    "AY-5"    "AY-6"    "AY-7"    "AY-8"    "AY-9"   
## [36] "AY-10"   "AY-11"   "AY-12"   "AY-13"   "AY-14"   "AY-15"   "AY-16"  
## [43] "AY-17"   "AY-18"   "AY-19"   "AY-20"   "AY-21"   "AY-22"   "AY-23"  
## [50] "AY-24"   "AY-25"   "AY-26"   "AY-27"   "AY-28"   "AY-29"   "AY-30"  
## [57] "AY-31"   "AY-32"   "AY-33"   "AY-34"   "AY-35"   "AY-36"   "AY-37"  
## [64] "AY-38"   "AY-39"   "AY-40"   "AY-41"   "AY-42"   "AY-50"   "AY-51"  
## [71] "AY-52"   "AY-53"   "AY-54"   "AY-50_2" "AY-51_2" "AY-52_2" "AY-53_2"
## [78] "AY-54_2"
loi <- subset(loi[1:72, ])

### add ggplots later with context and type

barplot(loi$c550)

barplot(loi$c950)

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(tga$c550, tga$c950)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(tga$c950)
## $stats
## [1] 0.7233976 1.6976633 3.2631063 5.4590326 9.7816508
## 
## $n
## [1] 69
## 
## $conf
## [1] 2.547658 3.978555
## 
## $out
##  [1] 12.04383 11.73184 29.72412 42.03111 42.69172 30.94577 30.11137 28.19411
##  [9] 34.00000 41.45557 29.66118 41.94993
# Removing duplicates from the next graph
#rownames(tga)
#loi <- subset(loi[1:47, ])

ggplot(tga, 
      aes(c550, c950, label = rownames(tga))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

barplot(tga$c550)

barplot(tga$c950)

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_israel.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a                b        
##  Min.   :43.75   Min.   : 3.160   Min.   :13.85  
##  1st Qu.:56.08   1st Qu.: 3.770   1st Qu.:14.71  
##  Median :64.30   Median : 4.350   Median :15.85  
##  Mean   :63.19   Mean   : 6.649   Mean   :17.27  
##  3rd Qu.:71.51   3rd Qu.: 6.280   3rd Qu.:17.70  
##  Max.   :75.34   Max.   :19.530   Max.   :25.84
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Palaepaphos

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(dplyr); 
library(GGally); 
library(corrplot); 
library(tidyr); 
library(openxlsx); 
library(cluster); 
library(ggplot2); 
library(ggfortify); 
library(pca3d);
library(FactoMineR); # fast PCA graphs
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters
library(tidyverse) # rectangles around HClust

# Read data
pxrf <- read.xlsx("data/pxrf_PP.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Turn any "LOD":s and missing values to "NA":s to allow scaling
values <- c("MgO", "Al2O3", "SiO2", 
            "S", "Cl", "K2O", "CaO", "Ti", "V", "Cr", 
            "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "As", 
            "Se", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", 
            "Rh", "Pd", "Ag", "Cd", "Sn", "Sb", 
            "La", "Ce", "Hf", "Ta", "W", "Pt", "Au", 
            "Hg", "Tl", "Pb", "Bi", "Th", "U")

pxrf_NA <- select(pxrf, one_of(values))
pxrf_NA <- pxrf_NA %>% mutate_if(is.character,as.numeric)

# Average data by "Sample", "Area" and "Type" columns from the original dataset
average_pxrf <- aggregate(pxrf_NA, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Assign sample names as row names, and rename the other newly created columns back to "Type" and "Area" for clarity
rownames(average_pxrf) <- average_pxrf$Group.1
average_pxrf <- average_pxrf %>% select(-Group.1)
names(average_pxrf)[names(average_pxrf) == "Group.2"] <- "Area"
names(average_pxrf)[names(average_pxrf) == "Group.3"] <- "Type"

# Change "Type" and "Area" to factors
average_pxrf$Type <- factor(average_pxrf$Type)
average_pxrf$Area <- factor(average_pxrf$Area)

# Removing columns that only have NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- average_pxrf %>% select_if(all_na)

# Removing columns if they contain any NA values at all
any_na <- function(x) all(!is.na(x))
pxrf_no_na <- average_pxrf %>% select_if(any_na)

# Datasets with only our new mudbrick samples (no soil or previously published ones)
MB_nona <- pxrf_no_na[c(1:11), ]
MB_all <- pxrf_all[c(1:11), ]

# Removing As, Nb, Rh, Ag and Pb as they have almost no viable measurement values
MB_all <- MB_all %>% 
        select(-As) %>% 
        select(-Nb) %>% 
        select(-Rh) %>%   
        select(-Ag) %>% 
        select(-Pb) 

pxrf_no_na:

summary(pxrf_no_na)
##    Area          Type          Ti               Mn                Fe        
##  new :11   mudbrick:45   Min.   :0.0969   Min.   :0.01190   Min.   :0.9617  
##  old :34   soil    : 8   1st Qu.:0.1814   1st Qu.:0.03893   1st Qu.:2.2241  
##  soil: 8                 Median :0.2118   Median :0.04670   Median :2.7214  
##                          Mean   :0.2180   Mean   :0.04549   Mean   :2.8091  
##                          3rd Qu.:0.2594   3rd Qu.:0.05440   3rd Qu.:3.3530  
##                          Max.   :0.3571   Max.   :0.06800   Max.   :4.7754  
##        Ni                 Cu                 Zr          
##  Min.   :0.000400   Min.   :0.003600   Min.   :0.004300  
##  1st Qu.:0.002200   1st Qu.:0.005933   1st Qu.:0.007800  
##  Median :0.003000   Median :0.006500   Median :0.008800  
##  Mean   :0.002856   Mean   :0.006761   Mean   :0.008810  
##  3rd Qu.:0.003500   3rd Qu.:0.007400   3rd Qu.:0.009667  
##  Max.   :0.005900   Max.   :0.011333   Max.   :0.012200

pxrf_all:

summary(pxrf_all)
##    Area          Type         MgO            Al2O3            SiO2       
##  new :11   mudbrick:45   Min.   :2.919   Min.   :2.182   Min.   : 9.164  
##  old :34   soil    : 8   1st Qu.:3.369   1st Qu.:3.403   1st Qu.:14.598  
##  soil: 8                 Median :4.088   Median :4.317   Median :17.087  
##                          Mean   :4.011   Mean   :4.168   Mean   :16.502  
##                          3rd Qu.:4.499   3rd Qu.:4.702   3rd Qu.:18.560  
##                          Max.   :5.125   Max.   :5.902   Max.   :22.259  
##                          NA's   :34      NA's   :34      NA's   :34      
##        S                 Cl               K2O              CaO       
##  Min.   :0.04373   Min.   :0.01113   Min.   :0.1980   Min.   :11.28  
##  1st Qu.:0.10686   1st Qu.:0.02597   1st Qu.:0.3599   1st Qu.:14.53  
##  Median :0.21532   Median :0.05117   Median :0.4238   Median :18.05  
##  Mean   :0.25538   Mean   :0.13135   Mean   :0.4340   Mean   :17.94  
##  3rd Qu.:0.32239   3rd Qu.:0.10892   3rd Qu.:0.4795   3rd Qu.:20.82  
##  Max.   :0.74597   Max.   :0.93440   Max.   :0.6584   Max.   :25.59  
##  NA's   :35        NA's   :34        NA's   :36       NA's   :34     
##        Ti               V                 Mn                Fe        
##  Min.   :0.0969   Min.   :0.00267   Min.   :0.01190   Min.   :0.9617  
##  1st Qu.:0.1814   1st Qu.:0.00334   1st Qu.:0.03893   1st Qu.:2.2241  
##  Median :0.2118   Median :0.00420   Median :0.04670   Median :2.7214  
##  Mean   :0.2180   Mean   :0.00425   Mean   :0.04549   Mean   :2.8091  
##  3rd Qu.:0.2594   3rd Qu.:0.00512   3rd Qu.:0.05440   3rd Qu.:3.3530  
##  Max.   :0.3571   Max.   :0.00587   Max.   :0.06800   Max.   :4.7754  
##                   NA's   :41                                          
##        Ni                 Cu                 Zn                As         
##  Min.   :0.000400   Min.   :0.003600   Min.   :0.00167   Min.   :0.00033  
##  1st Qu.:0.002200   1st Qu.:0.005933   1st Qu.:0.00308   1st Qu.:0.00033  
##  Median :0.003000   Median :0.006500   Median :0.00337   Median :0.00040  
##  Mean   :0.002856   Mean   :0.006761   Mean   :0.00335   Mean   :0.00040  
##  3rd Qu.:0.003500   3rd Qu.:0.007400   3rd Qu.:0.00382   3rd Qu.:0.00047  
##  Max.   :0.005900   Max.   :0.011333   Max.   :0.00500   Max.   :0.00047  
##                                        NA's   :34        NA's   :48       
##        Rb                Sr                Y                 Zr          
##  Min.   :0.00120   Min.   :0.01790   Min.   :0.00087   Min.   :0.004300  
##  1st Qu.:0.00173   1st Qu.:0.02893   1st Qu.:0.00137   1st Qu.:0.007800  
##  Median :0.00203   Median :0.03403   Median :0.00160   Median :0.008800  
##  Mean   :0.00204   Mean   :0.03402   Mean   :0.00161   Mean   :0.008810  
##  3rd Qu.:0.00213   3rd Qu.:0.03835   3rd Qu.:0.00173   3rd Qu.:0.009667  
##  Max.   :0.00357   Max.   :0.04840   Max.   :0.00260   Max.   :0.012200  
##  NA's   :35        NA's   :34        NA's   :36                          
##        Nb                Rh                Ag                Pb       
##  Min.   :0.00063   Min.   :0.01570   Min.   :0.00230   Min.   :0.002  
##  1st Qu.:0.00063   1st Qu.:0.01683   1st Qu.:0.00282   1st Qu.:0.002  
##  Median :0.00063   Median :0.01797   Median :0.00322   Median :0.002  
##  Mean   :0.00063   Mean   :0.01797   Mean   :0.00330   Mean   :0.002  
##  3rd Qu.:0.00063   3rd Qu.:0.01910   3rd Qu.:0.00369   3rd Qu.:0.002  
##  Max.   :0.00063   Max.   :0.02023   Max.   :0.00447   Max.   :0.002  
##  NA's   :52        NA's   :51        NA's   :49        NA's   :52

MB_nona:

summary(MB_nona)
##    Area          Type          Ti               Mn                Fe        
##  new :11   mudbrick:11   Min.   :0.0969   Min.   :0.01190   Min.   :0.9617  
##  old : 0   soil    : 0   1st Qu.:0.1663   1st Qu.:0.02397   1st Qu.:1.6270  
##  soil: 0                 Median :0.1915   Median :0.03347   Median :2.1312  
##                          Mean   :0.1780   Mean   :0.03070   Mean   :1.8912  
##                          3rd Qu.:0.1941   3rd Qu.:0.03895   3rd Qu.:2.2622  
##                          Max.   :0.2295   Max.   :0.04177   Max.   :2.4481  
##        Ni                 Cu                 Zr          
##  Min.   :0.001400   Min.   :0.004500   Min.   :0.005400  
##  1st Qu.:0.002317   1st Qu.:0.005533   1st Qu.:0.007767  
##  Median :0.003000   Median :0.006700   Median :0.008733  
##  Mean   :0.002812   Mean   :0.007333   Mean   :0.008494  
##  3rd Qu.:0.003367   3rd Qu.:0.008717   3rd Qu.:0.009100  
##  Max.   :0.003967   Max.   :0.011333   Max.   :0.011833

MB_all:

summary(MB_all)
##    Area          Type         MgO            Al2O3            SiO2       
##  new :11   mudbrick:11   Min.   :2.919   Min.   :2.182   Min.   : 9.164  
##  old : 0   soil    : 0   1st Qu.:3.531   1st Qu.:3.403   1st Qu.:13.366  
##  soil: 0                 Median :4.088   Median :4.317   Median :17.705  
##                          Mean   :3.999   Mean   :3.941   Mean   :15.874  
##                          3rd Qu.:4.400   3rd Qu.:4.593   3rd Qu.:18.560  
##                          Max.   :5.125   Max.   :5.270   Max.   :20.334  
##                                                                          
##        S                Cl               K2O              CaO       
##  Min.   :0.1622   Min.   :0.02723   Min.   :0.1980   Min.   :11.28  
##  1st Qu.:0.2737   1st Qu.:0.06783   1st Qu.:0.3920   1st Qu.:18.46  
##  Median :0.3221   Median :0.09397   Median :0.4376   Median :20.74  
##  Mean   :0.3550   Mean   :0.20759   Mean   :0.4312   Mean   :19.67  
##  3rd Qu.:0.3809   3rd Qu.:0.23782   3rd Qu.:0.4757   3rd Qu.:22.12  
##  Max.   :0.7460   Max.   :0.93440   Max.   :0.6052   Max.   :25.59  
##                                     NA's   :1                       
##        Ti               V                  Mn                Fe        
##  Min.   :0.0969   Min.   :0.002667   Min.   :0.01190   Min.   :0.9617  
##  1st Qu.:0.1663   1st Qu.:0.004317   1st Qu.:0.02397   1st Qu.:1.6270  
##  Median :0.1915   Median :0.004750   Median :0.03347   Median :2.1312  
##  Mean   :0.1780   Mean   :0.004721   Mean   :0.03070   Mean   :1.8912  
##  3rd Qu.:0.1941   3rd Qu.:0.005633   3rd Qu.:0.03895   3rd Qu.:2.2622  
##  Max.   :0.2295   Max.   :0.005867   Max.   :0.04177   Max.   :2.4481  
##                   NA's   :3                                            
##        Ni                 Cu                 Zn                 Rb          
##  Min.   :0.001400   Min.   :0.004500   Min.   :0.001667   Min.   :0.001200  
##  1st Qu.:0.002317   1st Qu.:0.005533   1st Qu.:0.002467   1st Qu.:0.001617  
##  Median :0.003000   Median :0.006700   Median :0.003233   Median :0.001767  
##  Mean   :0.002812   Mean   :0.007333   Mean   :0.003170   Mean   :0.001750  
##  3rd Qu.:0.003367   3rd Qu.:0.008717   3rd Qu.:0.003800   3rd Qu.:0.001925  
##  Max.   :0.003967   Max.   :0.011333   Max.   :0.005000   Max.   :0.002133  
##                                                           NA's   :1         
##        Sr                Y                   Zr          
##  Min.   :0.01790   Min.   :0.0008667   Min.   :0.005400  
##  1st Qu.:0.02893   1st Qu.:0.0013333   1st Qu.:0.007767  
##  Median :0.03507   Median :0.0014000   Median :0.008733  
##  Mean   :0.03438   Mean   :0.0013741   Mean   :0.008494  
##  3rd Qu.:0.03835   3rd Qu.:0.0014667   3rd Qu.:0.009100  
##  Max.   :0.04840   Max.   :0.0017333   Max.   :0.011833  
##                    NA's   :2

pXRF: Elemental correlations and K means cluster analysis

Only the “new” mudbrick samples

# Visualize correlations between elements
cor(MB_all[3:20]) %>% corrplot (type = "lower", tl.cex = 1, tl.pos="d", cl.pos="r")

# Optimal number of clusters in K-means analysis: 2
k_max <- 8
twcss <- sapply(1:k_max, function(k){kmeans(MB_nona[3:8], k)$tot.withinss})
qplot(x = 1:k_max, y = twcss, geom = 'line')

# K-means clustering in ggpairs: 2 clusters
km <-kmeans(MB_nona[3:8], centers = 2)
cluster_x <- as.factor(km$cluster)
ggpairs(MB_nona[3:8], mapping = aes(col = cluster_x))

pXRF: PCA

PCA with only the no-NA:s elements:

# PCA for only non-NA elements
colnames(MB_nona)
## [1] "Area" "Type" "Ti"   "Mn"   "Fe"   "Ni"   "Cu"   "Zr"
pca_1 <- prcomp(MB_nona[3:8], scale = TRUE, center = TRUE)
summary(pca_1)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6
## Standard deviation     2.0136 1.0789 0.70395 0.45806 0.25323 0.10779
## Proportion of Variance 0.6758 0.1940 0.08259 0.03497 0.01069 0.00194
## Cumulative Proportion  0.6758 0.8698 0.95241 0.98738 0.99806 1.00000
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"))

autoplot(pca_1, data=MB_nona, colour="red", shape = FALSE, label = TRUE,  main = "PCA Palaepaphos 1: no NA:s at all")

PCA(MB_nona[3:8])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 11 individuals, described by 6 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

PCA with all the feasible elements:

# NA:s converted to zeros
MB_all[is.na(MB_all)] <- 0

colnames(MB_all)
##  [1] "Area"  "Type"  "MgO"   "Al2O3" "SiO2"  "S"     "Cl"    "K2O"   "CaO"  
## [10] "Ti"    "V"     "Mn"    "Fe"    "Ni"    "Cu"    "Zn"    "Rb"    "Sr"   
## [19] "Y"     "Zr"
pca_2 <- prcomp((MB_all[3:20]), scale = TRUE, center = TRUE)
summary(pca_2)
## Importance of components:
##                           PC1    PC2     PC3     PC4    PC5     PC6    PC7
## Standard deviation     3.1620 1.8251 1.26077 1.10902 0.8950 0.63845 0.5612
## Proportion of Variance 0.5554 0.1851 0.08831 0.06833 0.0445 0.02265 0.0175
## Cumulative Proportion  0.5554 0.7405 0.82882 0.89715 0.9416 0.96429 0.9818
##                           PC8     PC9    PC10      PC11
## Standard deviation     0.4388 0.34174 0.13583 2.812e-16
## Proportion of Variance 0.0107 0.00649 0.00103 0.000e+00
## Cumulative Proportion  0.9925 0.99897 1.00000 1.000e+00
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"))

autoplot(pca_2, data=MB_all, colour="red", shape = FALSE, label = TRUE,  main = "PCA Palaepaphos 2: NA:s permitted")

PCA(MB_all[3:20])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 11 individuals, described by 18 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

PCA with all the Mudbrick and soil samples

Including the previously published ones, No NA values permitted

# PCA for only non-NA elements
colnames(pxrf_no_na)
## [1] "Area" "Type" "Ti"   "Mn"   "Fe"   "Ni"   "Cu"   "Zr"
pca_3 <- prcomp(na.omit(pxrf_no_na[3:6]), scale = TRUE, center = TRUE)
summary(pca_3)
## Importance of components:
##                           PC1    PC2     PC3     PC4
## Standard deviation     1.6514 0.9995 0.41614 0.31724
## Proportion of Variance 0.6818 0.2498 0.04329 0.02516
## Cumulative Proportion  0.6818 0.9315 0.97484 1.00000
biplot(pca_3, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"))

autoplot(pca_3, data=pxrf_no_na, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by area")

autoplot(pca_3, data=pxrf_no_na, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by sample type")

PCA with all the Mudbrick samples

Including the previously published ones, NA values are permitted This is not viable, as the difference in what values are available is too different between old and new samples.

# NA:s converted to zeros
pxrf_all[is.na(pxrf_all)] <- 0

colnames(pxrf_all)
##  [1] "Area"  "Type"  "MgO"   "Al2O3" "SiO2"  "S"     "Cl"    "K2O"   "CaO"  
## [10] "Ti"    "V"     "Mn"    "Fe"    "Ni"    "Cu"    "Zn"    "As"    "Rb"   
## [19] "Sr"    "Y"     "Zr"    "Nb"    "Rh"    "Ag"    "Pb"
pca_4 <- prcomp(pxrf_all[3:25], scale = TRUE, center = TRUE)
summary(pca_4)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     3.3748 1.8765 1.50629 1.13013 1.04455 0.89816 0.83735
## Proportion of Variance 0.4952 0.1531 0.09865 0.05553 0.04744 0.03507 0.03049
## Cumulative Proportion  0.4952 0.6483 0.74692 0.80245 0.84989 0.88497 0.91545
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.77794 0.66209 0.53427 0.40495 0.35837 0.33354 0.27679
## Proportion of Variance 0.02631 0.01906 0.01241 0.00713 0.00558 0.00484 0.00333
## Cumulative Proportion  0.94176 0.96082 0.97323 0.98036 0.98595 0.99078 0.99412
##                           PC15    PC16    PC17    PC18    PC19    PC20    PC21
## Standard deviation     0.22788 0.17584 0.14605 0.12032 0.10296 0.06388 0.04325
## Proportion of Variance 0.00226 0.00134 0.00093 0.00063 0.00046 0.00018 0.00008
## Cumulative Proportion  0.99637 0.99772 0.99865 0.99927 0.99974 0.99991 0.99999
##                           PC22      PC23
## Standard deviation     0.01157 2.376e-16
## Proportion of Variance 0.00001 0.000e+00
## Cumulative Proportion  1.00000 1.000e+00
biplot(pca_4, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"))

autoplot(pca_4, data=pxrf_all, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by area")

autoplot(pca_4, data=pxrf_all, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by sample type")

pXRF: HCA

HCA including only new mudbrick samples:

# HCA dendrogam 1
dend <- 
    MB_all[3:20] %>%           # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    set("branches_k_color", k=5) %>% 
    set("branches_lwd", 1.2) %>%
    set("labels_col", c("red", "black", "blue"), k=5) %>%    
    set("labels_cex", 1) %>% 
    set("leaves_pch", NA) 
# plot the dend in usual "base" plotting engine:
plot(dend)

# HCA dendrogam 2
HCA.ward5 <- hclust(dist(MB_all[3:20], method="euclid"), method="ward.D2")
plot(HCA.ward5, main="Ward's method")
rect.hclust(HCA.ward5, k=5)

# Divisive hierarchical clustering (DIANA) just for comparison
hc4 <- diana(MB_all[3:20])
pltree(hc4, cex = 1, hang = -1, main = "Dendrogram of diana")

# Divise coefficient; amount of clustering structure found
hc4$dc
## [1] 0.7894269

HCA including all mudbrick samples, only no NA:s elements :

# HCA dendrogam 1
dend <- 
    pxrf_no_na[1:45, 3:8] %>%           # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    set("branches_k_color", k=5) %>% 
    set("branches_lwd", 1.2) %>%
    set("labels_col", c("red", "black", "blue"), k=5) %>%    
    set("labels_cex", 1) %>% 
    set("leaves_pch", NA) 
# plot the dend in usual "base" plotting engine:
plot(dend)

# HCA dendrogam 2
HCA.ward5 <- hclust(dist(pxrf_no_na[1:45, 3:8], method="euclid"), method="ward.D2")
plot(HCA.ward5, main="Ward's method")
rect.hclust(HCA.ward5, k=5)

# Divisive hierarchical clustering (DIANA) just for comparison
hc4 <- diana(pxrf_no_na[3:8])
pltree(hc4, cex = 1, hang = -1, main = "Dendrogram of diana")

# Divise coefficient; amount of clustering structure found
hc4$dc
## [1] 0.9804178

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_PP.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Creating subsets for only mudbricks
MB_grain <- grain_01[c(1:11),]
MB_context <- grain[c(1:11),]

# Ternary plots
ggtern(data=MB_grain, aes(x=Clay, y=Sand, z=Silt, color=MB_context$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(MB_grain)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Loss on ignition

# Libraries
library(dplyr); 
library(GGally); 
library(corrplot); 
library(tidyr); 
library(openxlsx); 
library(cluster); 
library(ggplot2); 
library(ggrepel);
library(ggfortify); 
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters
library(ggtern) # ternary plots

Loss on ignition

Preparing data

loi <- read.xlsx("data/loi_PP.xlsx", sep=";")
tga <- read.xlsx("data/tga_PP.xlsx", sep=";")

summary(loi)
##      type              sample             crucible       wet;weight    
##  Length:27          Length:27          Min.   :8.100   Min.   : 9.637  
##  Class :character   Class :character   1st Qu.:8.639   1st Qu.:10.859  
##  Mode  :character   Mode  :character   Median :8.997   Median :11.184  
##                                        Mean   :8.988   Mean   :11.274  
##                                        3rd Qu.:9.454   3rd Qu.:11.844  
##                                        Max.   :9.560   Max.   :12.301  
##    sample;wet      dry_weight      after_550_C      after_950_C   
##  Min.   :1.537   Min.   : 9.595   Min.   : 9.484   Min.   : 9.23  
##  1st Qu.:2.193   1st Qu.:10.813   1st Qu.:10.721   1st Qu.:10.27  
##  Median :2.371   Median :11.132   Median :11.019   Median :10.59  
##  Mean   :2.286   Mean   :11.216   Mean   :11.113   Mean   :10.67  
##  3rd Qu.:2.453   3rd Qu.:11.777   3rd Qu.:11.681   3rd Qu.:11.18  
##  Max.   :2.785   Max.   :12.218   Max.   :12.076   Max.   :11.56
summary(tga)
##      Name           Analysis;Date         Batch               Wet           
##  Length:30          Length:30          Length:30          Length:30         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      Dry              Mass_550           Mass_950        
##  Length:30          Length:30          Length:30         
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(loi$c550, loi$c950)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(loi$c950)
## $stats
## [1] 14.55842 18.28992 20.42575 21.68496 25.15246
## 
## $n
## [1] 27
## 
## $conf
## [1] 19.39341 21.45808
## 
## $out
## [1] 33.21076 33.78112
# The two high carbonate outliers are both AY-53 (two different measurements to make sure the first batch wasn't misleading)

# Removing duplicates from the next graph
rownames(loi)
##  [1] "PP-1"   "PP-2"   "PP-3"   "PP-4"   "PP-5"   "PP-6"   "PP-7"   "PP-8"  
##  [9] "PP-9"   "PP-10"  "PP-11"  "PP-12"  "PP-13"  "PP-14"  "PP-15"  "PP-16" 
## [17] "PP-17"  "PP-18"  "PP-19"  "PP-1_2" "PP-2_2" "PP-3_2" "PP-4_2" "PP-5_2"
## [25] "PP-6_2" "PP-7_2" "PP-8_2"
loi <- subset(loi[1:19, ])

ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(type))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

barplot(loi$c550)

barplot(loi$c950)

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(tga$c550, tga$c950)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(tga$c950)
## $stats
## [1] 14.54839 18.93682 20.82648 22.49982 24.52314
## 
## $n
## [1] 28
## 
## $conf
## [1] 19.76259 21.89036
## 
## $out
## [1] 31.05334 33.79251
# Removing duplicates from the next graph
#rownames(tga)
#loi <- subset(loi[1:47, ])

ggplot(tga, 
      aes(c550, c950, label = rownames(tga))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

barplot(tga$c550)

barplot(tga$c950)

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_PP.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :62.38   Min.   : 7.27   Min.   :19.90  
##  1st Qu.:63.47   1st Qu.: 8.50   1st Qu.:21.36  
##  Median :64.19   Median : 9.34   Median :21.72  
##  Mean   :65.50   Mean   : 9.29   Mean   :21.92  
##  3rd Qu.:65.78   3rd Qu.:10.56   3rd Qu.:22.96  
##  Max.   :72.00   Max.   :10.90   Max.   :23.90
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Artaxata

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);

library(GGally); 
library(corrplot); 
library(tidyr); 
library(cluster); 
library(ggplot2); 
library(ggrepel);
library(ggfortify); 
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AA.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Turn any "LOD":s and missing values to "NA":s to allow scaling
# Removing "Na", S", "Cl", "P" and "Ba" already
values <- c("MgO", "Al2O3", "SiO2", 
            "K2O", "CaO", "Ti", "V", "Cr", 
            "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "As", 
            "Se", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", 
            "Rh", "Pd", "Ag", "Cd", "Sn", "Sb", 
            "La", "Ce", "Hf", "Ta", "W", "Pt", "Au", 
            "Hg", "Tl", "Pb", "Bi", "Th", "U")

pxrf_NA <- select(pxrf, one_of(values))
pxrf_NA <- pxrf_NA %>% mutate_if(is.character,as.numeric)

# Scaling the data with standard z-score method
scaled_pxrf <- as.data.frame(scale(pxrf_NA))

# Average data by "Sample", "Area" and "Type" columns from the original dataset
average_pxrf <- aggregate(scaled_pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Assign sample names as row names, and rename the other newly created columns back to "Type" and "Area" for clarity
rownames(average_pxrf) <- average_pxrf$Group.1
average_pxrf <- average_pxrf %>% select(-Group.1)
names(average_pxrf)[names(average_pxrf) == "Group.2"] <- "Area"
names(average_pxrf)[names(average_pxrf) == "Group.3"] <- "Type"

# Change "Type" and "Area" to factors
average_pxrf$Type <- factor(average_pxrf$Type)
average_pxrf$Area <- factor(average_pxrf$Area)

# Removing columns that only have NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- average_pxrf %>% select_if(all_na)

# Removing columns if they contain any NA values at all
any_na <- function(x) all(!is.na(x))
pxrf_no_na <- average_pxrf %>% select_if(any_na)

pxrf_no_na:

summary(pxrf_no_na)
##               Area         Type         MgO              Al2O3         
##  Hill II        :3   mudbrick:10   Min.   :-0.7957   Min.   :-1.47906  
##  Phase I        :2                 1st Qu.:-0.4773   1st Qu.:-0.41015  
##  Phase II       :2                 Median :-0.2741   Median : 0.05631  
##  Phase II or III:2                 Mean   : 0.0000   Mean   : 0.00000  
##  Urartian silo  :1                 3rd Qu.: 0.3559   3rd Qu.: 0.45276  
##                                    Max.   : 1.9131   Max.   : 1.38944  
##       SiO2              K2O                CaO                   Ti          
##  Min.   :-1.6654   Min.   :-1.55669   Min.   :-1.4333259   Min.   :-1.87044  
##  1st Qu.:-0.3827   1st Qu.:-0.68787   1st Qu.:-0.6680205   1st Qu.:-0.31750  
##  Median : 0.3100   Median : 0.03848   Median :-0.0001026   Median : 0.02437  
##  Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000000   Mean   : 0.00000  
##  3rd Qu.: 0.6156   3rd Qu.: 0.75515   3rd Qu.: 0.7641718   3rd Qu.: 0.81009  
##  Max.   : 0.8556   Max.   : 1.11418   Max.   : 1.2091486   Max.   : 0.86883  
##        Mn                 Fe                 Ni                 Cu         
##  Min.   :-1.03797   Min.   :-0.73814   Min.   :-0.94570   Min.   :-1.2582  
##  1st Qu.:-0.30897   1st Qu.:-0.28874   1st Qu.:-0.66424   1st Qu.:-0.5607  
##  Median :-0.09355   Median :-0.08696   Median :-0.07318   Median : 0.1094  
##  Mean   : 0.00000   Mean   : 0.00000   Mean   : 0.00000   Mean   : 0.0000  
##  3rd Qu.: 0.21238   3rd Qu.: 0.08823   3rd Qu.: 0.45456   3rd Qu.: 0.3966  
##  Max.   : 1.57654   Max.   : 1.60860   Max.   : 1.50299   Max.   : 1.5865  
##        Zn                Rb                Sr                Y          
##  Min.   :-0.8847   Min.   :-1.2036   Min.   :-1.4059   Min.   :-1.0149  
##  1st Qu.:-0.3508   1st Qu.:-0.4637   1st Qu.:-0.3803   1st Qu.:-0.3542  
##  Median :-0.0839   Median :-0.2006   Median :-0.1555   Median :-0.1163  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.2720   3rd Qu.: 0.3256   3rd Qu.: 0.4915   3rd Qu.: 0.1216  
##  Max.   : 1.5813   Max.   : 1.4601   Max.   : 1.4314   Max.   : 1.7338  
##        Zr         
##  Min.   :-1.1158  
##  1st Qu.:-0.5166  
##  Median :-0.3001  
##  Mean   : 0.0000  
##  3rd Qu.: 0.1279  
##  Max.   : 1.9054

pxrf_all:

summary(pxrf_all)
##               Area         Type         MgO              Al2O3         
##  Hill II        :3   mudbrick:10   Min.   :-0.7957   Min.   :-1.47906  
##  Phase I        :2                 1st Qu.:-0.4773   1st Qu.:-0.41015  
##  Phase II       :2                 Median :-0.2741   Median : 0.05631  
##  Phase II or III:2                 Mean   : 0.0000   Mean   : 0.00000  
##  Urartian silo  :1                 3rd Qu.: 0.3559   3rd Qu.: 0.45276  
##                                    Max.   : 1.9131   Max.   : 1.38944  
##                                                                        
##       SiO2              K2O                CaO                   Ti          
##  Min.   :-1.6654   Min.   :-1.55669   Min.   :-1.4333259   Min.   :-1.87044  
##  1st Qu.:-0.3827   1st Qu.:-0.68787   1st Qu.:-0.6680205   1st Qu.:-0.31750  
##  Median : 0.3100   Median : 0.03848   Median :-0.0001026   Median : 0.02437  
##  Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000000   Mean   : 0.00000  
##  3rd Qu.: 0.6156   3rd Qu.: 0.75515   3rd Qu.: 0.7641718   3rd Qu.: 0.81009  
##  Max.   : 0.8556   Max.   : 1.11418   Max.   : 1.2091486   Max.   : 0.86883  
##                                                                              
##        V                  Cr                  Mn                 Fe          
##  Min.   :-0.46003   Min.   :-0.686179   Min.   :-1.03797   Min.   :-0.73814  
##  1st Qu.:-0.43566   1st Qu.:-0.459188   1st Qu.:-0.30897   1st Qu.:-0.28874  
##  Median :-0.19193   Median :-0.232198   Median :-0.09355   Median :-0.08696  
##  Mean   :-0.09932   Mean   :-0.232198   Mean   : 0.00000   Mean   : 0.00000  
##  3rd Qu.: 0.02742   3rd Qu.:-0.005208   3rd Qu.: 0.21238   3rd Qu.: 0.08823  
##  Max.   : 0.56361   Max.   : 0.221783   Max.   : 1.57654   Max.   : 1.60860  
##  NA's   :5          NA's   :8                                                
##        Ni                 Cu                Zn                As          
##  Min.   :-0.94570   Min.   :-1.2582   Min.   :-0.8847   Min.   :-1.03932  
##  1st Qu.:-0.66424   1st Qu.:-0.5607   1st Qu.:-0.3508   1st Qu.:-0.21581  
##  Median :-0.07318   Median : 0.1094   Median :-0.0839   Median :-0.05111  
##  Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.05869  
##  3rd Qu.: 0.45456   3rd Qu.: 0.3966   3rd Qu.: 0.2720   3rd Qu.: 0.44299  
##  Max.   : 1.50299   Max.   : 1.5865   Max.   : 1.5813   Max.   : 1.10179  
##                                                         NA's   :1         
##        Rb                Sr                Y                 Zr         
##  Min.   :-1.2036   Min.   :-1.4059   Min.   :-1.0149   Min.   :-1.1158  
##  1st Qu.:-0.4637   1st Qu.:-0.3803   1st Qu.:-0.3542   1st Qu.:-0.5166  
##  Median :-0.2006   Median :-0.1555   Median :-0.1163   Median :-0.3001  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.3256   3rd Qu.: 0.4915   3rd Qu.: 0.1216   3rd Qu.: 0.1279  
##  Max.   : 1.4601   Max.   : 1.4314   Max.   : 1.7338   Max.   : 1.9054  
##                                                                         
##        Nb          
##  Min.   :-0.65796  
##  1st Qu.:-0.50862  
##  Median :-0.06060  
##  Mean   :-0.09379  
##  3rd Qu.:-0.06060  
##  Max.   : 0.93500  
##  NA's   :4

pXRF: Elemental correlations and K means cluster analysis

# Drop the more dubious elements
pxrf_1 <- pxrf_no_na %>% select(-Al2O3)
pxrf_1 <- pxrf_1 %>% select(-CaO)

# Visualize correlations between elements
cor(pxrf_1[3:15]) %>% corrplot (type = "lower", tl.cex = 1, tl.pos="d", cl.pos="r")

# Optimal number of clusters in K-means analysis: 2
k_max <- 8
twcss <- sapply(1:k_max, function(k){kmeans(pxrf_1[3:12], k)$tot.withinss})
qplot(x = 1:k_max, y = twcss, geom = 'line')

# K-means clustering in ggpairs: 2 clusters
km <-kmeans(pxrf_1[3:12], centers = 2)
cluster_x <- as.factor(km$cluster)
ggpairs(pxrf_1, mapping = aes(col = cluster_x))

pXRF: PCA

PCA with only the no-NA:s elements:

# PCA
pca_1 <- prcomp(pxrf_1[3:15])
summary(pca_1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.4147 1.0889 0.9306 0.48309 0.37987 0.31662 0.24466
## Proportion of Variance 0.6901 0.1403 0.1025 0.02762 0.01708 0.01186 0.00708
## Cumulative Proportion  0.6901 0.8305 0.9330 0.96059 0.97767 0.98953 0.99662
##                            PC8     PC9      PC10
## Standard deviation     0.15630 0.06455 7.266e-17
## Proportion of Variance 0.00289 0.00049 0.000e+00
## Cumulative Proportion  0.99951 1.00000 1.000e+00
# PCA1 plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA limited elements")

autoplot(pca_1, data=pxrf_1, colour='Area', shape = FALSE, label = TRUE,  main = "PCA grouped by area")

PCA with all the feasible elements:

# PCA with all elements, NA:s converted to zeros
pxrf_all[is.na(pxrf_all)] <- 0

pca_2 <- prcomp(pxrf_all[3:21])
summary(pca_2)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.6421 1.2026 0.98838 0.84185 0.45012 0.39002 0.30457
## Proportion of Variance 0.6529 0.1353 0.09136 0.06628 0.01895 0.01423 0.00868
## Cumulative Proportion  0.6529 0.7881 0.87946 0.94574 0.96469 0.97891 0.98759
##                            PC8     PC9      PC10
## Standard deviation     0.28180 0.23084 2.238e-16
## Proportion of Variance 0.00743 0.00498 0.000e+00
## Cumulative Proportion  0.99502 1.00000 1.000e+00
# PCA2 plots
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA more elements")

autoplot(pca_2, data=pxrf_all, colour='Area', shape = FALSE, label = TRUE,  main = "PCA more elements grouped by area")

pXRF: HCA

# HCA

# HCA dendrogam 1
dend <- 
    pxrf_1[3:12] %>%                    # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    set("branches_k_color", k=4) %>% 
    set("branches_lwd", 1.2) %>%
    set("labels_col", c("red", "black", "blue"), k=4) %>%    
    set("labels_cex", 1) %>% 
    set("leaves_pch", NA) 
# plot the dend in usual "base" plotting engine:
plot(dend)

# HCA dendrogam 2: 
HCA.ward4 <- hclust(dist(pxrf_1[3:12], method="euclid"), method="ward.D2")
plot(HCA.ward4, main="HCA with clusters")
rect.hclust(HCA.ward4, k=4)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_AA.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

# Libraries
library(dplyr); 
library(GGally); 
library(corrplot); 
library(tidyr); 
library(openxlsx); 
library(cluster); 
library(ggplot2); 
library(ggrepel);
library(ggfortify); 
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters
library(ggtern) # ternary plots

Preparing data

loi <- read.xlsx("data/loi_AA.xlsx", sep=";")
tga <- read.xlsx("data/tga_AA.xlsx", sep=";")

summary(loi)
##     sample             crucible       wet_weight      sample_wet   
##  Length:10          Min.   :8.066   Min.   :10.70   Min.   :1.923  
##  Class :character   1st Qu.:8.461   1st Qu.:10.93   1st Qu.:2.326  
##  Mode  :character   Median :8.995   Median :11.48   Median :2.558  
##                     Mean   :8.892   Mean   :11.39   Mean   :2.496  
##                     3rd Qu.:9.174   3rd Qu.:11.75   3rd Qu.:2.701  
##                     Max.   :9.890   Max.   :12.18   Max.   :2.842  
##    dry_weight     after_550_C     after_950_C   
##  Min.   :10.58   Min.   :10.47   Min.   :10.31  
##  1st Qu.:10.85   1st Qu.:10.78   1st Qu.:10.60  
##  Median :11.43   Median :11.33   Median :11.16  
##  Mean   :11.32   Mean   :11.22   Mean   :11.07  
##  3rd Qu.:11.69   3rd Qu.:11.58   3rd Qu.:11.42  
##  Max.   :12.13   Max.   :12.06   Max.   :11.94
summary(tga)
##      Name           Analysis;Date         Batch               Wet           
##  Length:12          Length:12          Length:12          Length:12         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      Dry              Mass_550           Mass_950        
##  Length:12          Length:12          Length:12         
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(loi$c550, loi$c950)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(loi$c950)
## $stats
## [1] 5.507166 5.756990 6.617026 7.738517 7.965070
## 
## $n
## [1] 10
## 
## $conf
## [1] 5.626976 7.607076
## 
## $out
## numeric(0)
# The two high carbonate outliers are both AY-53 (two different measurements to make sure the first batch wasn't misleading)

# Removing duplicates from the next graph
rownames(loi)
##  [1] "AA-1"  "AA-2"  "AA-3"  "AA-4"  "AA-5"  "AA-6"  "AA-7"  "AA-8"  "AA-9" 
## [10] "AA-10"
loi <- subset(loi[1:47, ])

### ggplots later with context and type

barplot(loi$c550)

barplot(loi$c950)

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(tga$c550, tga$c950)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(tga$c950)
## $stats
## [1] 5.611934 6.118481 6.532557 7.126867 8.599873
## 
## $n
## [1] 11
## 
## $conf
## [1] 6.052174 7.012940
## 
## $out
## [1] 4.097341
# Removing duplicates from the next graph
#rownames(tga)
#loi <- subset(loi[1:47, ])

ggplot(tga, 
      aes(c550, c950, label = rownames(tga))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

barplot(tga$c550)

barplot(tga$c950)

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AA.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :51.37   Min.   :3.880   Min.   :15.92  
##  1st Qu.:57.73   1st Qu.:4.402   1st Qu.:16.49  
##  Median :58.79   Median :4.555   Median :16.93  
##  Mean   :58.18   Mean   :5.067   Mean   :17.32  
##  3rd Qu.:59.58   3rd Qu.:4.695   3rd Qu.:17.32  
##  Max.   :61.57   Max.   :8.670   Max.   :21.47
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)